---
title: "Citizen Journalist Analysis"
author: "Consuelo Amat, Jonathan Pinckney, and Jawhara Kanu"
date: "`r Sys.Date()`"
output: html_document
---

The following code executes all analysis using the event data based on citizen journalist reports for "Training Activists in Nonviolent Action Increases Self-Efficacy and Reduces State Violence," including Figures 1,3, and 4 in the main text of the article and several tables and figures from the appendix. Analysis was conducted using R 4.5.0. 

Running the code requires placing the articles' datasets in a subfolder titled "data." It also requires nine packages not included in base R. These packages and the version numbers used to run this analysis are indicated in the first code chunk of this markdown document.

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) # v. 2.0.0
library(sf) # v. 1.0-21
library(boot) # v. 1.3-32
library(marginaleffects) # v. 0.29.0
library(sandwich) # v. 3.1-1
library(lmtest) # v. 0.9-40
library(cobalt) # v. 4.6.1
library(ggrepel) # v. 0.9.6

usip.yellow <- "#C19F53"
usip.blue <-  "#276383"
usip.dark.blue <- "#183B4B"
usip.gray <- "#949599"


usip.red.brown <- "#C18467"
usip.other.red.brown <- "#856466"
usip.gray.green <- "#68836E"
usip.light.gray <- "#ECE7DA"
usip.dark.teal <- "#36868D"
usip.light.teal <- "#98C3C8"

mean.fun <- function(data,indexes){ # Function for mean with removing NAs - to be used in bootstrapping 
  mean(data[indexes],na.rm = T)}

# Shapefiles for Mapping

all.countries <- st_read("data/shapefiles/ne_10m_admin_0_countries.shp")

sudan.states <- st_read("data/shapefiles/sdn_admbnda_adm1_cbs_nic_ssa_20200831.shp") # Sudan states shapefile

sudan.districts <- st_read("data/shapefiles/sdn_admbnda_adm2_cbs_nic_ssa_20200831.shp") # Sudan Districts Shapefile


# Cleaned Citizen Journalist Data

cj.data <- read_rds("data/citizen_journalist_events.rds")

treatment.control.districts <- read_rds("data/treatment_control.rds")


```



# Figure 1: Map of Treatment and Control Districts

```{r}
treatment.map <- left_join(sudan.districts,treatment.control.districts,by = "ADM2_PCODE") %>% 
  mutate(treat = if_else(is.na(treatment),"Outside Study",treatment))

ggplot() + 
  geom_sf(data = all.countries,fill = "antiquewhite") +
  geom_sf(data = treatment.map,aes(fill = as.factor(treat)),color = "gray10") +
  geom_sf(data = sudan.states,fill = "transparent",color = "black",linewidth = 0.75) +
  coord_sf(xlim = c(20,40),ylim = c(8,23)) +
  scale_fill_manual(name = NULL,values = c(usip.blue,usip.gray,usip.yellow),labels = c("Control","Outside Study","Treatment")) +
  theme_bw() +
  theme(panel.background = element_rect(fill = "lightblue"),
        panel.grid = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_rect(fill = "transparent",color = "transparent"),
        plot.margin = unit(c(4, 1, 4, 1), "lines"),
        legend.position = "bottom",
        legend.text = element_text(size = 20)
  )

ggsave("treatment_control_map.pdf",width = 10,height = 10, dpi = 320)

```


# Figure 3: Bar Graph of Average Differences Across Treatment/Control

```{r}

nonstate.v <- cj.data %>%
  group_by(treatment) %>%
  do(data.frame(rbind(Hmisc::smean.cl.boot(.$nonstate.violence.bin)))) %>% 
  mutate(variable = "Nonstate\nViolence")


state.v <- cj.data %>%
  group_by(treatment) %>%
  do(data.frame(rbind(Hmisc::smean.cl.boot(.$state.violence.bin)))) %>% 
  mutate(variable = "State\nViolence")


issue.complexity <- cj.data %>%
  group_by(treatment) %>%
  do(data.frame(rbind(Hmisc::smean.cl.boot(.$more.than.1.issue)))) %>% 
  mutate(variable = "Multiple\nIssues")


concessions <- cj.data %>%
  group_by(treatment) %>%
  do(data.frame(rbind(Hmisc::smean.cl.boot(.$concessions)))) %>% 
  mutate(variable = "Concessions")


full.data <- bind_rows(nonstate.v,state.v,issue.complexity,concessions) %>% 
  filter(!(is.na(treatment)))

ggplot(full.data,aes(x = variable,y = Mean,fill = treatment)) + 
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = Lower,ymax = Upper),position = position_dodge(0.9),width = 0.2) +
  scale_fill_manual(name = "",values = c(usip.blue,usip.yellow)) +
  scale_y_continuous(name = "Percent of Events",labels = scales::percent) +
  labs(x = "") +
  theme_bw() +
  theme(text = element_text(size = 24))

ggsave("cjdata_bar.pdf",width = 12,height = 8, dpi = 320)
  
```

# Figure 3: Coefficient Plots from Event-Level Analysis

Four logit models of binary measures of state violence, nonstate violence, more than one issue, and government concessions. Event begins post-treatment is the key independent variable. Controlling for pre-treatment protests per capita, violent events per capita, population density, and average night light emissions (control variables used to randomize into treatment and control). Standard errors clustered at the district level.

```{r}
dvs <- c("state.violence.bin","nonstate.violence.bin","more.than.1.issue","concessions")

cj.mods <- map(dvs, function(dv){
  glm(as.formula(paste(dv,"post.treatment + protests.pc.acled + pop_dens + night.light.avg + violence.pc.acled",sep = "~")), # Run logit model
                 data = cj.data,
                 family = binomial(link = "logit")) %>% 
    coeftest(vcov = vcovCL, # Replace SEs with clustered SEs at district level
             type = "HC1",
             cluster = ~ADM2_PCODE) %>%  
    bind_cols(.,confint(.)) %>% # Add 95% confidence intervals
    mutate(outcome = dv) # Specify DV
})

coefplot.data <- map(cj.mods, ~ slice(.,2)) %>% # Select coefficient for treatment
  bind_rows()

ggplot(coefplot.data,aes(Estimate,outcome)) +
  geom_point() +
  geom_errorbar(aes(xmin = `2.5 %`,xmax = `97.5 %`),width = 0.1) +
  geom_vline(xintercept = 0) +
  scale_y_discrete(labels = c("Concessions","Multiple Issues","Nonstate Violence","State Violence")) +
  labs(x = "Treatment Coefficient",y = "Dependent Variable") +
  theme_bw() +
  theme(text = element_text(size = 18))
  
  
ggsave("cjdata_coefplot.pdf",width = 12,height = 8, dpi = 320)
```
With control variables added, treatment significantly reduces the likelihood of state violence (at the event level) after treatment. No significant effect on likelihood of nonstate violence, multiple issues, or government concessions.

## Marginal Effects of Treatment

```{r}
mod.for.margins <- glm(state.violence.bin ~ post.treatment + protests.pc.acled + pop_dens + night.light.avg + violence.pc.acled,
                       data = cj.data,
                       family = binomial(link = "logit"))

prediction.data <- cj.data %>% 
  group_by(ADM2_PCODE) %>% 
  reframe(across(c(protests.pc.acled,pop_dens,night.light.avg,violence.pc.acled),mean.fun)) %>% 
  ungroup() %>% 
  reframe(across(c(protests.pc.acled,pop_dens,night.light.avg,violence.pc.acled),mean.fun),
          post.treatment = c(0,1))

predictions(mod.for.margins,
            newdata = datagrid(post.treatment = c(0,1)),
            type = "response")



```

# Appendix Tables and Figures

## Timeline of Workshops and Control Group Surveys

```{r}
survey.dates <- readxl::read_xlsx("data/07.30.21 SNAP Sudan survey dates tracker.xlsx") %>% 
  mutate(Date = mdy(`Survey date`),
         treat.numeric = if_else(...5 == "Treatment",1,0))

colnames(survey.dates)[c(3,5)] <- c("District","Treatment Status")



ggplot(survey.dates, aes(x = Date, y = treat.numeric,shape = `Treatment Status`,color = `Treatment Status`)) + 
  geom_point(size = 8) +
  geom_hline(yintercept = 1,color = "grey") +
  geom_hline(yintercept = 0,color = "grey") +
#  geom_text(data = filter(survey.dates,`Treatment Status` == "Control"), aes(label = District),
#                  vjust = 1,hjust = 1,angle = 45
#                  ) +
#  geom_text(data = filter(survey.dates,`Treatment Status` == "Treatment"), aes(label = District),
#                  vjust = -6,hjust = 1,angle = 45
#                  ) +
  scale_x_date(limits = c(mdy("01-01-2021"),mdy("09-01-2021")),
               breaks = seq(mdy("01-01-2021"),mdy("09-01-2021"),length.out = 5),
               labels = c("January 1\n2021","March 1\n2021","May 1\n2021","July 1\n2021","September 1\n2021")) +
  scale_y_continuous(breaks = NULL,limits = c(-0.5,1.5)) +
  scale_color_manual(values = c("Treatment" = usip.yellow,"Control" = usip.blue)) +
  labs(title = "Timeline of Treatment Workshops and Control Group Surveys", x = "", y = "") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        legend.position = "bottom",
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(size = 20),
        legend.text = element_text(size = 20),
        text = element_text(size = 22)
        )

ggsave("timeline.jpg",width = 13,height = 8,dpi = 320)
```


## Descriptive Statistics on Citizen Journalist Data

```{r}
summary.stats <- cj.data %>% 
  dplyr::select(state.violence.bin,nonstate.violence.bin,more.than.1.issue,concessions,pop_dens,night.light.avg,protests.pc.acled,violence.pc.acled) %>% 
  pivot_longer(cols = everything(),names_to = "Variable") %>%
  drop_na() %>% 
  group_by(Variable) %>% 
  summarize(n = n(),
            mean = mean(value,na.rm = T),
            max = max(value,na.rm = T),
            min = min(value,na.rm = T),
            sd = sd(value,na.rm = T)) %>% 
  mutate(Variable = c("Concessions","Multiple Issues","Night Light","Nonstate Violence","Population Density","Protests PC","State Violence","Violent Attacks PC"),
         across(is.numeric,~ round(.,digits = 4)))

write_csv(summary.stats,"cj-sumstats.csv")

```


## Balance Table

```{r}

balance.table <-bal.tab(treatment.control.districts[,15:18],treat = treatment.control.districts$treatment)[[1]] %>% 
  rownames_to_column(var = "variable") %>% 
  mutate(st.dev = map_dbl(variable, function(x){
    sd(treatment.control.districts[[x]],na.rm = T)
  }),
    p.values = map_dbl(variable, function(x){
    t.test(treatment.control.districts[[x]][treatment.control.districts$treatment=="Treatment"],treatment.control.districts[[x]][treatment.control.districts$treatment=="Control"])$p.value
  }))

```


